In [2]:
from tensorflow import keras
In [3]:
dataset = keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test_full, y_test_full) = dataset.load_data()
In [4]:
X_train_full.shape
Out[4]:
(60000, 28, 28)
In [5]:
X_test_full.shape
Out[5]:
(10000, 28, 28)

70,000 of full data is seperated into training 60,000 and testing 10,000 and each image is 28 by 28 2D array

Now we need to create a validation set and need to scale for neural network

Scaling is done by dividing the number by the largest which is 255 since a pixel is between 0 and 255

In [6]:
X_valid, X_train=X_train_full[:5000]/255.0,X_train_full[5000:]/255.0
y_valid, y_train=y_train_full[:5000],y_train_full[5000:]
In [7]:
X_test=X_test_full/255.0
y_test=y_test_full

First 5000 is validation set and the rest from the 60,000 is train set

Creating the model using sequential API¶

In [8]:
model=keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28,28]))
model.add(keras.layers.Dense(300,activation='relu'))
model.add(keras.layers.Dense(100,activation='relu'))
model.add(keras.layers.Dense(10,activation='softmax'))

1-Create the sequential model(single stack of layers connected sequentially)

2-First layer used for data preprosessing (flattning ,converting 2D to 1D)

3-Dense hidden layer with 300 neurons ,activation function relu which is the norm

4-Second hidden layer

5-Dense output layer with 10 neurons since there are 10 classes ,use softmax (because the classes are exclusive and this is a multiclass classification)

we can pass a list of layers when creating, a list of layers as below

In [9]:
model=keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.Dense(300,activation='relu'),
    keras.layers.Dense(100,activation='relu'),
    keras.layers.Dense(10,activation='softmax'),
])
In [10]:
model.summary() #all the model layers(None means batch size can be anything)
Model: "sequential_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ flatten_1 (Flatten)             │ (None, 784)            │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_3 (Dense)                 │ (None, 300)            │       235,500 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_4 (Dense)                 │ (None, 100)            │        30,100 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_5 (Dense)                 │ (None, 10)             │         1,010 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 266,610 (1.02 MB)
 Trainable params: 266,610 (1.02 MB)
 Non-trainable params: 0 (0.00 B)
In [11]:
model.layers #access all the layers
Out[11]:
[<Flatten name=flatten_1, built=True>,
 <Dense name=dense_3, built=True>,
 <Dense name=dense_4, built=True>,
 <Dense name=dense_5, built=True>]
In [12]:
hidden1=model.layers[1] #can call each layer by the index
hidden1.name
Out[12]:
'dense_3'
In [13]:
weights,biases=hidden1.get_weights() #get ech layers parameters by calling get_weights funtion
In [14]:
weights
Out[14]:
array([[-0.00329745,  0.055204  ,  0.066624  , ...,  0.0559136 ,
        -0.05367599,  0.02627006],
       [ 0.02993034,  0.0679446 ,  0.03813273, ..., -0.04538053,
         0.02183852,  0.00674835],
       [-0.0256972 ,  0.03161852,  0.07265496, ...,  0.0385574 ,
        -0.02168931,  0.04781467],
       ...,
       [ 0.03442346, -0.01825543,  0.01425272, ..., -0.00487291,
        -0.03699834, -0.03362278],
       [-0.07326835, -0.03743588, -0.03252079, ...,  0.0719367 ,
         0.04672862,  0.04777335],
       [-0.01954559, -0.0738189 ,  0.06967571, ...,  0.04953486,
         0.03712752,  0.02743707]], dtype=float32)

Compiling the model¶

In [15]:
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='sgd',
    metrics=['accuracy']
)

loss-because we have spares lables(for each instance there is only one target)

optimizer-stochastic gradient descent

useful to measure accuracy

Training and evaluating the model¶

In [16]:
history=model.fit(X_train,y_train,epochs=30,
                  validation_data=(X_valid,y_valid)) #instead od validation_data we can do validation_split=0.1 etc..(use 10% of data before shuffling for validation)
Epoch 1/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 7s 4ms/step - accuracy: 0.6673 - loss: 1.0451 - val_accuracy: 0.8332 - val_loss: 0.4993
Epoch 2/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 4ms/step - accuracy: 0.8212 - loss: 0.5116 - val_accuracy: 0.8474 - val_loss: 0.4474
Epoch 3/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.8415 - loss: 0.4538 - val_accuracy: 0.8582 - val_loss: 0.4227
Epoch 4/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.8511 - loss: 0.4199 - val_accuracy: 0.8656 - val_loss: 0.3993
Epoch 5/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.8606 - loss: 0.3997 - val_accuracy: 0.8618 - val_loss: 0.3916
Epoch 6/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.8626 - loss: 0.3848 - val_accuracy: 0.8692 - val_loss: 0.3779
Epoch 7/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.8700 - loss: 0.3671 - val_accuracy: 0.8692 - val_loss: 0.3773
Epoch 8/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 4ms/step - accuracy: 0.8767 - loss: 0.3511 - val_accuracy: 0.8710 - val_loss: 0.3628
Epoch 9/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 4ms/step - accuracy: 0.8767 - loss: 0.3452 - val_accuracy: 0.8696 - val_loss: 0.3654
Epoch 10/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.8773 - loss: 0.3420 - val_accuracy: 0.8700 - val_loss: 0.3630
Epoch 11/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.8813 - loss: 0.3328 - val_accuracy: 0.8766 - val_loss: 0.3437
Epoch 12/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.8863 - loss: 0.3186 - val_accuracy: 0.8818 - val_loss: 0.3297
Epoch 13/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 7s 4ms/step - accuracy: 0.8875 - loss: 0.3119 - val_accuracy: 0.8816 - val_loss: 0.3277
Epoch 14/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 7s 4ms/step - accuracy: 0.8928 - loss: 0.3027 - val_accuracy: 0.8826 - val_loss: 0.3278
Epoch 15/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 7s 4ms/step - accuracy: 0.8909 - loss: 0.3006 - val_accuracy: 0.8846 - val_loss: 0.3184
Epoch 16/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 9s 5ms/step - accuracy: 0.8964 - loss: 0.2892 - val_accuracy: 0.8848 - val_loss: 0.3201
Epoch 17/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 7s 4ms/step - accuracy: 0.8971 - loss: 0.2854 - val_accuracy: 0.8826 - val_loss: 0.3180
Epoch 18/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 8s 4ms/step - accuracy: 0.8993 - loss: 0.2785 - val_accuracy: 0.8868 - val_loss: 0.3079
Epoch 19/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.9004 - loss: 0.2753 - val_accuracy: 0.8854 - val_loss: 0.3201
Epoch 20/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.9025 - loss: 0.2712 - val_accuracy: 0.8818 - val_loss: 0.3218
Epoch 21/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.9034 - loss: 0.2659 - val_accuracy: 0.8864 - val_loss: 0.3068
Epoch 22/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.9085 - loss: 0.2551 - val_accuracy: 0.8862 - val_loss: 0.3132
Epoch 23/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.9081 - loss: 0.2573 - val_accuracy: 0.8880 - val_loss: 0.3074
Epoch 24/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 4ms/step - accuracy: 0.9089 - loss: 0.2524 - val_accuracy: 0.8886 - val_loss: 0.3003
Epoch 25/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.9117 - loss: 0.2453 - val_accuracy: 0.8938 - val_loss: 0.2993
Epoch 26/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 5s 3ms/step - accuracy: 0.9111 - loss: 0.2423 - val_accuracy: 0.8844 - val_loss: 0.3162
Epoch 27/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.9142 - loss: 0.2389 - val_accuracy: 0.8906 - val_loss: 0.3090
Epoch 28/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.9165 - loss: 0.2326 - val_accuracy: 0.8924 - val_loss: 0.3022
Epoch 29/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 4ms/step - accuracy: 0.9161 - loss: 0.2293 - val_accuracy: 0.8926 - val_loss: 0.3014
Epoch 30/30
1719/1719 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.9200 - loss: 0.2252 - val_accuracy: 0.8912 - val_loss: 0.2997

if the performance on the training set is much better than on the validation set mostlikely model is overfitting the training set or a bug

when calling fit() method we can call class_weight argument id some classes being over or underrepresented if so it will balance the weight eaqually

Training parameters¶

In [17]:
history.params
Out[17]:
{'verbose': 'auto', 'epochs': 30, 'steps': 1719}

List of epochs¶

In [18]:
history.epoch
Out[18]:
[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29]

Dictionary containing a list of loss and extra metrics it measured at the end of each epoch¶

In [19]:
history.history
Out[19]:
{'accuracy': [0.756672739982605,
  0.8289090991020203,
  0.8434908986091614,
  0.8537818193435669,
  0.8593636155128479,
  0.8651272654533386,
  0.8691818118095398,
  0.8733817934989929,
  0.876763641834259,
  0.8799818158149719,
  0.8820727467536926,
  0.8853999972343445,
  0.88718181848526,
  0.8910727500915527,
  0.8918545246124268,
  0.8947091102600098,
  0.8972545266151428,
  0.899327278137207,
  0.9008181691169739,
  0.903145432472229,
  0.9047818183898926,
  0.9071454405784607,
  0.9079272747039795,
  0.9095090627670288,
  0.9120000004768372,
  0.9125090837478638,
  0.914545476436615,
  0.916345477104187,
  0.9165818095207214,
  0.9180545210838318],
 'loss': [0.7378497123718262,
  0.48883289098739624,
  0.4449765384197235,
  0.4175524413585663,
  0.3985403776168823,
  0.3820713460445404,
  0.36844417452812195,
  0.3559795916080475,
  0.3463549017906189,
  0.3368052840232849,
  0.3285277485847473,
  0.31977418065071106,
  0.31097421050071716,
  0.30485057830810547,
  0.2986160218715668,
  0.2914504408836365,
  0.28552526235580444,
  0.2800866365432739,
  0.27491796016693115,
  0.2690766453742981,
  0.2633378207683563,
  0.25915294885635376,
  0.2552708387374878,
  0.25015559792518616,
  0.24597640335559845,
  0.24102188646793365,
  0.23761503398418427,
  0.23326702415943146,
  0.2296309769153595,
  0.22661589086055756],
 'val_accuracy': [0.8331999778747559,
  0.8474000096321106,
  0.8582000136375427,
  0.8655999898910522,
  0.8618000149726868,
  0.8691999912261963,
  0.8691999912261963,
  0.8709999918937683,
  0.8695999979972839,
  0.8700000047683716,
  0.8766000270843506,
  0.8817999958992004,
  0.881600022315979,
  0.8826000094413757,
  0.8845999836921692,
  0.8848000168800354,
  0.8826000094413757,
  0.8867999911308289,
  0.8853999972343445,
  0.8817999958992004,
  0.8863999843597412,
  0.8862000107765198,
  0.8880000114440918,
  0.8885999917984009,
  0.8938000202178955,
  0.8844000101089478,
  0.8906000256538391,
  0.8924000263214111,
  0.8925999999046326,
  0.8912000060081482],
 'val_loss': [0.49927231669425964,
  0.4473873972892761,
  0.42269834876060486,
  0.39929434657096863,
  0.39162975549697876,
  0.3779332935810089,
  0.3772680163383484,
  0.3627755641937256,
  0.36535489559173584,
  0.3629917502403259,
  0.3437325060367584,
  0.3296666741371155,
  0.3277183175086975,
  0.32775723934173584,
  0.31836092472076416,
  0.3201417326927185,
  0.317959189414978,
  0.30793848633766174,
  0.32010623812675476,
  0.321804940700531,
  0.3068252205848694,
  0.3132171034812927,
  0.30735939741134644,
  0.30028796195983887,
  0.29932069778442383,
  0.316245973110199,
  0.3089560270309448,
  0.3021879494190216,
  0.301445871591568,
  0.29968366026878357]}
In [20]:
import pandas as pd
import matplotlib.pyplot as plt

pd.DataFrame(history.history).plot(figsize=(8,5))
plt.grid(True)
plt.gca().set_ylim(0,1) # vertical range from 0 to 1
Out[20]:
(0.0, 1.0)
No description has been provided for this image
In [21]:
model.evaluate(X_test,y_test)
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - accuracy: 0.8812 - loss: 0.3327
Out[21]:
[0.33464494347572327, 0.8809999823570251]

Make predictions¶

In [ ]:
X_new=X_test[:3] #take first 3 samples
y_proba=model.predict(X_new)
y_proba.round(2) #round to 2 decimal places
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 61ms/step
Out[ ]:
array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.01, 0.  , 0.98],
       [0.  , 0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ]],
      dtype=float32)

for each instance model estimates one probability per class, class with highest probebility is the class

In [ ]:
model.predict_classes(X_test[0])
In [ ]:
y_test[:3] #take first 3 samples
y_test[:3] == y_proba.argmax(axis=1) #compare the predicted with the actual values
#model.save('my_model.h5') #save the model
Out[ ]:
array([ True,  True,  True])